# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import pymongo
from scrapy.exceptions import DropItem


class DuplicationPipeline(object):
    def __init__(self):
        self.uid_set = set()

    def process_item(self, item, spider):
        realm_type = item.get("realm_type")
        name = item.get("name")
        uid = item.get("uid")
        visitors = item.get("visitors")
        if (not name) or (name == "Null"):
            raise DropItem(realm_type + "------没有相关博主！")
        if uid in self.uid_set:
            raise DropItem("重复元素：%s" %item["name"])
        if not visitors:
            raise DropItem("博主<%s>已被封博！" %item["name"])
        self.uid_set.add(uid)
        return item


class MongoPipeline(object):
    @classmethod
    def from_crawler(cls, crawler):
        cls.mongo_uri = crawler.settings.get("MONGO_URI", "localhost:27017")
        cls.mongo_db = crawler.settings.get("MONGO_DB", "scinet")
        cls.mongo_collection = crawler.settings.get("MONGO_COLLECTION", "bloggers")
        return cls()

    def open_spider(self, spider):
        self.client = pymongo.MongoClient(self.mongo_uri)
        self.db = self.client[self.mongo_db]

    def process_item(self, item, spider):
        self.db[self.mongo_collection].insert(dict(item))
        return item

    def close_spider(self, spider):
        self.client.close()
